#######################################################
# Script: Snelle naturalisatie bevordert kans op baan #
# Maarten Vink, Floris Peters & Hans Schmeets         #
# Economisch Statistische Berichten                   #
#######################################################


################
# Introduction #
################

# This file provides the syntax used to create all the tables and figures in the paper. 
# The dataset contains sensitive, micro level information. As such, for privacy reasons the data is only available to individuals employed at or affiliated to Statistics Netherlands. 
# The dataset can be found at the following location on the network of Statistics Netherlands: \\cbsp.nl\Productie\Projecten\SAL\209253UM_FP_SEC1\Werk\Floris\PhD\ESB_employment 

#######################################################################################################################
#######################################################################################################################

#############
# Variables #
#############

# ID
# (Individual identification number)
 
# EMPLOYMENT
# (Employment status)
# [0] Not employed; 
# [1] Employed
 
# NATURALIZATION
# (Possession of Dutch citizenship)
# [0] Not naturalized; 
# [1] Naturalized

# NATURALIZATION_CAT
# (Moment of naturalization, categorical)
# [1] No naturalization;
# [2] >3 years prior to naturalization;
# [3] 3 years prior to naturalization; 
# [4] 2 years prior to naturalization;
# [5] 1 year prior to naturalization;
# [6] Year of naturalization;
# [7] 1 year after naturalization;
# [8] 2 years after naturalization;
# [9] 3 years after naturalization;
# [10] >3 years after naturalization

# YSN
# (Years since naturalization)

# IMMIGRATIONYEAR
# (Migrationcohorts - Year of first immigration to the Netherlands)

# GENDER
# [1] Male; 
# [2] Female

# AGEARRIVAL
# (Age at the moment of migration)

# AGECAT 
# (Age at the moment of migration in categories) 
# [1] 20-24 years; 
# [2] 25-29 years; 
# [3] 30-34 years; 
# [4] 35-39 years; 
# [5] 40-44 years; 
# [6] 45-50 years

# RESIDENCE
# (Years since migration)

# RESIDENCECAT
# (Years since migration in categories)
# [1] 0-1 years; 
# [2] 2-3 years; 
# [3] 4-5 years; 
# [4] 6-7 years; 
# [5] 8-9 years

# PARTNER
# [1] No partner; 
# [2] Foreign born foreign partner; 
# [3] Foreign born Dutch partner; 
# [4] Native Dutch partner

# CHILD
# (Children in the household in categories)
# [0] no children <18 in household; 
# [1] Children <18 in household

# EU
# [0] Not EU country of origin; 
# [1] EU country of origin
  
# NATURALIZATIONSPEED
# (Speed of naturalization)
# [0] No naturalization;
# [1] 1-3 years;
# [2] 4 years;
# [3] 5 years;
# [4] 6 years;
# [5] 7 years;
# [6] 8-10 years

#######################################################################################################################
#######################################################################################################################

#load packages
library(miceadds)

#load datasets
dataset_main <- read.csv(file.choose(),header=T,sep=";")


###########     
# Table 1 #
###########  

#specify subgroups
dataset_male <- subset(dataset_main, GENDER == 1)
dataset_female <- subset(dataset_main, GENDER == 2)

#descriptive statistics
prop.table(table(dataset_male$NATURALIZATIONSPEED,dataset_male$EMPLOYMENT),1)
prop.table(dataset_male$EMPLOYMENT)
prop.table(table(dataset_female$NATURALIZATIONSPEED,dataset_female$EMPLOYMENT),1)
prop.table(dataset_female$EMPLOYMENT)

#sample sizes
length(dataset_male$ID)
length(unique(dataset_male$ID))
length(dataset_female$ID)
length(unique(dataset_female$ID))


###########     
# Table 2 #
###########  

#create employment lags
dataset_male$EMPLOYMENTLAG <- lag(dataset_male$EMPLOYMENT, n = 1L)
dataset_female$EMPLOYMENTLAG <- lag(dataset_female$EMPLOYMENT, n = 1L)

#logistic regression analyses
result_Table_2_1 <- glm(EMPLOYMENT ~ as.factor(NATURALIZATIONSPEED) + AGEARRIVAL + NATURALIZATION:RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) +
                       as.factor(EU), data = dataset_male, family = "binomial", cluster = "ID")
result_Table_2_2 <- glm(EMPLOYMENT ~ as.factor(NATURALIZATIONSPEED) + AGEARRIVAL + NATURALIZATION:RESIDENCE + as.factor(PARTNER) + as.factor(CHILD) +
                          as.factor(EU), data = dataset_female, family = "binomial", cluster = "ID")

